
# Load packages and data --------------------------------------------------

require(foreign)
require(pastecs)
require(stringr)
require(dplyr)
require(fastDummies)
require(tidyr)
require(ggplot2)
require(forcats)
require(egg)
require(scales)
require(cowplot)
require(dotwhisker)
require(broom)
df<-read.csv("Enumerators_Cleaned_CPS.csv")


# Table 1: Summary Statistics ---------------------------------------------

vars<-c("age", "dept_abidjan", "sex", "akan", "krou", "nmande", "smande", "voltaique", "christian", "edu", "govt","experience_years", "politics", "N_DeptWork", "projectN")

var.names<-c("Age", "From Abidjan", "Sex", "Akan", "Krou", "North Mandé", "South Mandé", "Voltaïque", "Christian", "Education","Govt supporter", "Experience", "Political Projects", "Number of depts worked", "Projects N")
summary_stats<-t(stat.desc(df[vars]))

summary_stats<-as.data.frame(summary_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])

summary_stats$desc<-var.names
summary_stats<-summary_stats[c(6,1,2,3,4,5)]
colnames(summary_stats)<-c("description", "mean", "min", "max", "std.dev", "n")


# Figure 1: Fear and felt insecurity --------------------------------------

safety_dum<- df %>% select(safe, challenges_5) %>% mutate_if(is.numeric, as.factor)

safety_dum<-fastDummies::dummy_cols(safety_dum)

safety_dum<- safety_dum %>% select(safe_0:challenges_5_4) %>% pivot_longer(safe_0:challenges_5_4) %>% group_by(name) %>%
  summarise_at("value", mean, na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(var=str_split(name, "_", simplify=T)[,1],
         agree=c("0","1", "2", "3","4", "0", "1", "2","3"))



ggplot(safety_dum, aes(x = forcats::fct_rev(var), y = value*100, fill=factor(agree))) +
  geom_bar(position = position_stack(), stat = "identity", width = .5) +
  scale_fill_manual(values=c("#FAEBDDFF", "#CB1B4FFF","#AE1759FF","#8E1D5BFF","#701F57FF"),
                    name="",
                    labels=c("Never","Sometimes","Half the time","Most of the time" ,	"Always"), guide = guide_legend(reverse = TRUE)) +
  scale_x_discrete(breaks=c("challenges", "safe"),
                   labels=c("Feared physical safety", "Felt unsafe")) +
  theme_minimal() +
  xlab("") + ylab("Proportion (%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

# Figure 2: Enumerator reported fear and exposure to violence -------------


violence_witness<- df %>% select(contains("violence_witness_")) %>% pivot_longer(violence_witness_1:Violence_Witness_Any) %>%
  group_by(name) %>%
  summarise_at("value", mean, na.rm=T)

violence_witness$reason<-forcats::fct_rev(violence_witness$name)



violence_dum<- df %>% select(violence_scale_1:violence_scale_4) %>% mutate_if(is.numeric, as.factor)

violence_dum<-fastDummies::dummy_cols(violence_dum)

violence_dum<- violence_dum %>% select(violence_scale_1_0:violence_scale_4_NA) %>% pivot_longer(violence_scale_1_0:violence_scale_4_3)%>%
  group_by(name) %>%
  summarise_at("value", mean,na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(var=str_split(name, "_", simplify=T)[,3],
         agree=str_split(name, "_", simplify=T)[,4])

violence_dum_plot<-ggplot(violence_dum, aes(x = forcats::fct_rev(var), y = value*100, fill=factor(agree))) +
  geom_bar(position = position_stack(), stat = "identity", width = .5) +
  #geom_text(aes(label=paste0(round(value*100,0), "%")), position = position_stack(vjust = 0.5), fontface="bold",color="white", size=3.5)+
  scale_fill_manual(values=c("#FAEBDDFF", "#CB1B4FFF","#AE1759FF","#8E1D5BFF","#701F57FF"),
                    name="",
                    labels=c("Never","Once","A few times","Multiple times" ,	"Always"), guide = guide_legend(reverse = TRUE)) +
  scale_x_discrete(breaks=c(1, 2, 3, 4),
                   labels=c("Been followed", "Robbed", "Threatened with violence", "Physically assaulted")) +
  theme_minimal() +
  #labs(title="Candidate Ethnicity")  +
  xlab("") + ylab("Proportion (%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        text=element_text(size=20),
        plot.title=element_text(hjust=.5, size=20))+
  ylim(0,100.5)+ggtitle("Exposure to personal violence")

violence_witness_plot<-ggplot(violence_witness, aes(x = reorder(factor(reason), value), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  #geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=1.1, vjust=.5, color="white", size=4)+
  theme_minimal() +
  scale_x_discrete(breaks=c(colnames(df)[grepl("violence_witness_",colnames(df))]),
                   labels=c("Mass protest", "Threats and harassment", "Physical violence", "Theft or destruction of property"))+
  xlab("") + ylab("") +
  coord_flip()+
  theme(text=element_text(size=20),
        plot.title=element_text(hjust=.5, size=20))+
  ylim(0,100.5)+ggtitle("Witnessed violence (Yes)")


ggarrange(violence_dum_plot,
          violence_witness_plot, nrow=2)



# Figure 3: Distribution of enumerator responses to violence/fear --------

# bottom panel: violence-affected communities

c_h<-df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated, levels=c(0,1,2,3,4,5),labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>% group_by(work_sum_any_violence_share_x_abi_mutated) %>% mutate(N=1) %>% summarise_at("N", sum, na.rm=T)   %>% mutate(value=N/sum(N)) %>%

  ggplot(aes(x = factor(work_sum_any_violence_share_x_abi_mutated), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=1.1, vjust=.5, color="white", size=5)+
  theme_minimal() +
  xlab("") + ylab("") +
  theme(
    text=element_text(size=15),
    axis.title.y = element_text(size=15),
    legend.text=element_text(size=15),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.title = element_text(size=15))+
  ylim(0,100)+ggtitle("Share of violence-affected communities worked")+coord_flip()

#top panel: index

i_h<-df %>% group_by(violence_index1) %>% mutate(N=1) %>% summarise_at("N", sum, na.rm=T)   %>% mutate(value=N/sum(N)) %>%

  ggplot(aes(x = factor(violence_index1), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=1.1, vjust=.5, color="white", size=5)+
  theme_minimal() +
  xlab("") + ylab("") +
  theme(
    text=element_text(size=15),
    axis.title.y = element_text(size=15),
    legend.text=element_text(size=15),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.title = element_text(size=15))+
  ylim(0,100)+ggtitle("Violence Experience/Fear Index")+coord_flip()


ggarrange(i_h,c_h, nrow=2)


# Figure 4: Challenges and explanations -----------------------------------

# left panel: challenges

df_chal<- df %>% select(contains("challenges_")[2:10]) %>% lapply(., function(x) ifelse(x>=1,1, 0)) %>% data.frame() %>%
  pivot_longer(challenges_1:challenges_9) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T)
df_chal$name<-forcats::fct_rev(df_chal$name)

plot3<-ggplot(df_chal, aes(x = reorder(factor(name),value), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=.85, vjust=.5, color="white", size=5)+
  theme_minimal() +
  scale_x_discrete(breaks=c(colnames(df)[grepl("^challenges_",colnames(df))][2:10]), labels=c("Insufficient Pay ",
                                                                                             "Insufficient food/accommodation ",
                                                                                             "Limited cell phone reception ",
                                                                                             "Difficult travel conditions ",
                                                                                             "Feared physical safety in assigned region",
                                                                                             "Uncomfortable working in assigned region",
                                                                                             "Time commitment was too long",
                                                                                             "Required to travel too much",
                                                                                             "Disputes with the research team"))+
  xlab("") + ylab("(%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=15),
        legend.title = element_text(size=15))+
  ylim(0,100)


# right panel: explanations

safe_why<- df %>% select(contains("safe_why_")) %>% pivot_longer(safe_why_1:safe_why_8) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T)

safe_why$name<-forcats::fct_rev(safe_why$name)


plot4<-ggplot(safe_why, aes(x = reorder(factor(name), value), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=1, vjust=.5, color="white", size=5)+
  theme_minimal() +
  scale_x_discrete(breaks=c(colnames(df)[grepl("safe_why_",colnames(df))][c(1:4, 6:8)]),
                   labels=c("Political environment",
                            "General feeling of unwelcome",
                            "Isolation of fieldsite",
                            "Harassment by local authorities",
                            "Harassment by respondents",
                            "Violence at field site",
                            "High crime rate of area"))+
  xlab("") + ylab("(%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=15),
        legend.title = element_text(size=15))+
  ylim(0,100)


ggarrange(plot3, plot4, ncol=2, labels=c("Challenges enumerators face", "Why felt unsafe?"))




# Figure 5: Relationship between insecurity and challenges to safe --------

dvs.c2<-c(c(colnames(df)[grepl("Local_Chall_",colnames(df))]))

# right panel: violent communities worked

vc_chal<-df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated,
                                                                        levels=c(0,1,2,3,4,5),
                                                                        labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>%
  group_by(work_sum_any_violence_share_x_abi_mutated) %>%
  filter(!is.na(work_sum_any_violence_share_x_abi_mutated)) %>% summarise_at(dvs.c2, sum, na.rm=T) %>%
  pivot_longer(Local_Chall_Chiefs_Dummy:Local_Chall_Other_Dummy) %>%
  ggplot(aes(x=work_sum_any_violence_share_x_abi_mutated, y=forcats::fct_rev(name))) + geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  #scale_fill_gradientn(colors = viridis_pal(direction=-1, begin = 0, end = 0.8, option="rocket")(23))+
  scale_y_discrete(breaks=c("Local_Chall_Chiefs_Dummy","Local_Chall_Party_Dummy","Local_Chall_Mayor_Dummy",
                            "Local_Chall_Prefet_Dummy","Local_Chall_Police_Dummy","Local_Chall_Youth_Dummy",
                            "Local_Chall_RespFam_Dummy","Local_Chall_Respondent_Dummy","Local_Chall_Other_Dummy"),
                   labels = c("Chiefs", "Party \nleaders","Mayors", "Prefets", "Police","Youth","Respondent \nfamilies",
                              "Respondent","Other \ncommunity members"))+
  labs(y="", x="Share of violence affected communities worked")+
  ggtitle("Violent Communities Worked")+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
       legend.position = "bottom",panel.grid.minor = element_blank(), text=element_text(size=15),axis.title.y = element_text(size=15),
       legend.text=element_text(size=10),
       axis.text.y = element_blank(),
       axis.ticks.y=element_blank(),
       legend.title = element_text(size=15), axis.line = element_line(colour = "black"))

# left panel: violence experience
vi_chal<-df %>% group_by(violence_index1) %>% filter(!is.na(violence_index1)) %>%
  summarise_at(dvs.c2, sum, na.rm=T) %>% pivot_longer(Local_Chall_Chiefs_Dummy:Local_Chall_Other_Dummy) %>%
  ggplot(aes(x=violence_index1, y=forcats::fct_rev(name))) + geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c("Local_Chall_Chiefs_Dummy","Local_Chall_Party_Dummy","Local_Chall_Mayor_Dummy",
                            "Local_Chall_Prefet_Dummy","Local_Chall_Police_Dummy","Local_Chall_Youth_Dummy","Local_Chall_RespFam_Dummy",
                            "Local_Chall_Respondent_Dummy","Local_Chall_Other_Dummy"),
                   labels = c("Chiefs", "Party \nleaders",
                              "Mayors",
                              "Prefets",
                              "Police",
                              "Youth",
                              "Respondent \nfamilies",
                              "Respondent",
                              "Other \ncommunity members"))+labs(y="Local Challenges", x="Violence Experience/Fear Index")+
  scale_x_continuous(breaks=pretty_breaks())+ggtitle("Violence Experience")+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(), legend.position = "bottom",
        panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15), axis.line = element_line(colour = "black"))

ggarrange(vi_chal, vc_chal, ncol=2)


# Figure 6: Relationship between insecurity and data coll -----------------

dvs.dif<-c(colnames(df)[grepl("difficulties_coll",colnames(df))][1:6])

vi_coll<-df %>% group_by(violence_index1) %>% filter(!is.na(violence_index1)) %>%
  summarise_at(dvs.dif, sum, na.rm=T) %>% pivot_longer(dvs.dif) %>%
  ggplot(aes(x=violence_index1, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory")+
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.dif), labels = c("Complicated Qs", "Not relevant", "Rephrasing \nneeded",
                                                 "Too sensitive", "Culturally \nirrelevant",
                                                 "Psychologically \nchallenging"))+labs(y="Collection difficulties",
                                                                                        x="Violence Experience/Fear Index")+
  scale_x_continuous(breaks=pretty_breaks())+theme_bw() +
  theme(panel.border = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "bottom",
        panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violence Experience/Fear Index")


vc_coll<- df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated,
                                                                         levels=c(0,1,2,3,4,5),
                                                                         labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>%
  group_by(work_sum_any_violence_share_x_abi_mutated) %>% filter(!is.na(work_sum_any_violence_share_x_abi_mutated))  %>%
  summarise_at(dvs.dif, sum, na.rm=T) %>% pivot_longer(dvs.dif) %>%
  ggplot(aes(x=work_sum_any_violence_share_x_abi_mutated, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory") + scale_fill_viridis_c(name="Count of enumerators",
                                                                   option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.dif), labels = c("Complicated Qs", "Not relevant", "Rephrasing \nneeded",
                                                 "Too sensitive", "Culturally \nirrelevant",
                                                 "Psychologically \nchallenging")) +labs(y="",
                                                                                         x="Share of violence affected communities worked")+
  theme_bw() +
  theme(panel.border = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "bottom",
        panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.text.y = element_blank(),axis.ticks.y=element_blank(),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violent Communities Worked")

ggarrange(vi_coll, vc_coll, ncol=2)



# Figure 8: To deal with challenges, how often do ... ---------------------

break_protocol_dum<- df %>% select(break_protocol_1:break_protocol_8) %>% mutate_if(is.numeric, as.factor)

break_protocol_dum<-fastDummies::dummy_cols(break_protocol_dum)

break_protocol_dum<- break_protocol_dum %>% select(break_protocol_1_0:break_protocol_8_NA) %>%
  pivot_longer(break_protocol_1_0:break_protocol_8_NA) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(var=str_split(name, "_", simplify=T)[,3],
         agree=str_split(name, "_", simplify=T)[,4])


break_protocol_dum_plot<-ggplot(break_protocol_dum, aes(x = forcats::fct_rev(var), y = value*100, fill=factor(agree))) +
  geom_bar(position = position_stack(), stat = "identity", width = .5) +
  scale_fill_manual(values=c("#FAEBDDFF", "#CB1B4FFF","#AE1759FF","#8E1D5BFF","#701F57FF"),
                    name="",
                    labels=c("Never","Sometimes","Half the time","Most of the time" ,	"Always"), guide = guide_legend(reverse = TRUE)) +
  scale_x_discrete(breaks=c("1","2","3", "4", "5", "6", "7", "8"),
                   labels=c("Skip HH", "Fabricate answers",
                            "Adapt or rephrase",
                            "Abandon",
                            "Deviate random-walk",
                            "Lie to supervisor",
                            "Select easier respondent",
                            "Ease questions")) +
  theme_minimal() +
  xlab("") + ylab("Proportion (%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15))+
  theme(aspect.ratio=1/1.15)

# bottom panel
solutions_dum<- df %>% select(solution_threats_1:solution_threats_5) %>% mutate_if(is.numeric, as.factor)

solutions_dum<-fastDummies::dummy_cols(solutions_dum)

solutions_dum<- solutions_dum %>% select(solution_threats_1_0:solution_threats_5_6) %>%
  pivot_longer(solution_threats_1_0:solution_threats_5_6) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(var=str_split(name, "_", simplify=T)[,3],
         agree=str_split(name, "_", simplify=T)[,4])



solutions_dum_plot<-ggplot(solutions_dum, aes(x = forcats::fct_rev(var), y = value*100, fill=factor(agree))) +
  geom_bar(position = position_stack(), stat = "identity", width = .5) +
  #geom_text(aes(label=paste0(round(value*100,0), "%")), position = position_stack(vjust = 0.5), fontface="bold",color="white", size=3)+
  scale_fill_manual(values=c("#FAEBDDFF", "#CB1B4FFF","#AE1759FF","#8E1D5BFF","#701F57FF","#521E4DFF","#36193EFF"),

                    name="",
                    labels=c("Extremely unlikely",
                             "Moderately unlikely",
                             "Slightly unlikely",
                             "Neither likely nor unlikely",
                             "Slightly likely",
                             "Moderately likely",
                             "Extremely likely"), guide = guide_legend(reverse = TRUE)) +
  scale_x_discrete(breaks=c("1","2","3", "4", "5"),
                   labels=c("Skip HH",
                            "Fill in answers",
                            "Skip questions",
                            "Choose easier respondents",
                            "Abbreviate questions/consent")) +
  theme_minimal() +
  #labs(title="Candidate Ethnicity")  +
  xlab("") + ylab("Proportion (%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        #axis.text.x=element_blank(),
        #axis.ticks.x=element_blank(),
        axis.text.x=element_text(size=15),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15))+
  theme(aspect.ratio=1/1.15)


plot_grid(break_protocol_dum_plot+ ggtitle("Others break protocol")+
            theme(plot.title=element_text(hjust=.5, size=20)),
                             solutions_dum_plot+ggtitle("Self break protocol")+
            theme(plot.title=element_text(hjust=.5, size=20)), nrow=2, align="vh")



# Figure 9: Experience with violence and others break protocol --------

dvs.b<-c(colnames(df)[grepl("break_protocol",colnames(df))])


vi_others<-df %>% group_by(violence_index1) %>% filter(!is.na(violence_index1)) %>%
  summarise_at(dvs.b, sum, na.rm=T) %>% pivot_longer(break_protocol_1:break_protocol_8) %>%
  ggplot(aes(x=violence_index1, y=forcats::fct_rev(name))) + geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.b),labels=c("Skip HH",
                                            "Fabricate \nanswers",
                                            "Adapt or \nrephrase",
                                            "Abandon",
                                            "Deviate \nrandom-walk",
                                            "Lie to \nsupervisor",
                                            "Select easier \nrespondent",
                                            "Ease \nquestions"))+labs(y="Others' Protocol Breaches", x="Violence Experience/Fear Index")+
  scale_x_continuous(breaks=pretty_breaks())+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        legend.position = "bottom", panel.grid.minor = element_blank(), text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violence Experience/Fear Index")



vc_others<-df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated,
                                                                          levels=c(0,1,2,3,4,5),
                                                                          labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>%
  group_by(work_sum_any_violence_share_x_abi_mutated) %>%
  filter(!is.na(work_sum_any_violence_share_x_abi_mutated))  %>%
  summarise_at(dvs.b, sum, na.rm=T) %>% pivot_longer(break_protocol_1:break_protocol_8)  %>%
  ggplot(aes(x=work_sum_any_violence_share_x_abi_mutated, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.b),labels=c("Skip HH", "Fabricate answers",
                                            "Adapt or rephrase",
                                            "Abandon",
                                            "Deviate random-walk",
                                            "Lie to supervisor",
                                            "Select easier respondent",
                                            "Ease questions"))+labs(y="", x="Share of violence affected communities worked")+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(), legend.position = "bottom",
        panel.grid.minor = element_blank(), text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        axis.text.y = element_blank(),
        axis.ticks.y=element_blank(),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violent Communities Worked")


ggarrange(vi_others, vc_others, ncol=2)




# Figure 10: experience with violence and self break ----------------------

dvs.s<-c(colnames(df)[grepl("solution_threats",colnames(df))])


vi_self<-df %>% group_by(violence_index1) %>% filter(!is.na(violence_index1)) %>%
  summarise_at(dvs.s, sum, na.rm=T) %>% pivot_longer(solution_threats_1:solution_threats_5) %>%
  ggplot(aes(x=violence_index1, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.s),labels=c("Skip HH",
                                            "Fill in \nanswers",
                                            "Skip \nquestions",
                                            "Choose easier \nrespondents",
                                            "Abbreviate \nquestions/consent"))+
  labs(y="Self Break Protocol", x="Violence Experience/Fear Index")+
  scale_x_continuous(breaks=pretty_breaks())+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        legend.position = "bottom", panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violence Experience/Fear Index")


vc_self<- df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated,
                                                                         levels=c(0,1,2,3,4,5),
                                                                         labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>% group_by(work_sum_any_violence_share_x_abi_mutated) %>% filter(!is.na(work_sum_any_violence_share_x_abi_mutated))  %>% summarise_at(dvs.s, sum, na.rm=T) %>% pivot_longer(solution_threats_1:solution_threats_5) %>%
  ggplot(aes(x=work_sum_any_violence_share_x_abi_mutated, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.s),labels=c("Skip HH",
                                            "Fill in answers",
                                            "Skip questions",
                                            "Choose easier respondents",
                                            "Abbreviate questions/consent"))+
  labs(y="",
       x="Share of violence affected communities worked")+ theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        legend.position = "bottom", panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        axis.text.y = element_blank(),
        axis.ticks.y=element_blank(),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violent Communities Worked")

plot_grid(vi_self, vc_self)


# Figure 11: How often do you emphasize the following ---------------------

affiliations_dum<- df %>% select(affiliations_1:affiliations_3) %>%
  mutate_if(is.numeric, as.factor)

affiliations_dum<-fastDummies::dummy_cols(affiliations_dum)

affiliations_dum<- affiliations_dum %>% select(affiliations_1_0:affiliations_3_4) %>%
  pivot_longer(affiliations_1_0:affiliations_3_4) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(var=str_split(name, "_", simplify=T)[,2],
         agree=str_split(name, "_", simplify=T)[,3])



ggplot(affiliations_dum, aes(x = forcats::fct_rev(var), y = value*100, fill=factor(agree)))+
  geom_bar(position = position_stack(), stat = "identity", width = .5) +
  ggtitle("How often do you emphasize the following...") +
  scale_fill_manual(values=c("#FAEBDDFF", "#CB1B4FFF","#AE1759FF","#8E1D5BFF","#701F57FF"),
                    name="",
                    labels=c("Never","Sometimes","Half the time","Most of the time" ,	"Always"),
                    guide = guide_legend(reverse = TRUE))+
  scale_x_discrete(breaks=c("1","2","3"),
                   labels=c("My ethnicity",
                            "My partisanship",
                            "My religion")) +
  theme_minimal() +
  xlab("") + ylab("Proportion (%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        text=element_text(size=15),
        axis.text.y=element_text(size=15),
        axis.text.x=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15))+
  theme(aspect.ratio=1/1.15)



# Figure 12: Experience with violence and affiliations --------------------

dvs.a<-c("affiliations_1","affiliations_2" ,"affiliations_3")


vi_affil<-df %>% group_by(violence_index1) %>% filter(!is.na(violence_index1)) %>%
  summarise_at(dvs.a, sum, na.rm=T) %>% pivot_longer(dvs.a) %>%
  ggplot(aes(x=violence_index1, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory")+
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.a),labels=c("My \nethnicity",
                                            "My \npartisanship",
                                            "My \nreligion"))+
  labs(y="Affiliations", x="Violence Experience/Fear Index")+
  scale_x_continuous(breaks=pretty_breaks())+theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        legend.position = "bottom", panel.grid.minor = element_blank(),
        text=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=10),
        legend.title = element_text(size=15),
        axis.line = element_line(colour = "black"))+
  ggtitle("Violence Experience/Fear Index")


vc_affil<- df %>% mutate(work_sum_any_violence_share_x_abi_mutated=factor(work_sum_any_violence_share_x_abi_mutated,
                                                                          levels=c(0,1,2,3,4,5),
                                                                          labels=c("0","0.2", "0.4", "0.6", "0.8", "1.00"))) %>%
  group_by(work_sum_any_violence_share_x_abi_mutated) %>%
  filter(!is.na(work_sum_any_violence_share_x_abi_mutated))  %>%
  summarise_at(dvs.a, sum, na.rm=T) %>% pivot_longer(dvs.a) %>%
  ggplot(aes(x=work_sum_any_violence_share_x_abi_mutated, y=forcats::fct_rev(name))) +
  geom_tile(aes(fill=value), color="ivory") +
  scale_fill_viridis_c(name="Count of enumerators", option="rocket", direction = -1) +
  scale_y_discrete(breaks=c(dvs.a),labels=c("My ethnicity","My partisanship",
                                            "My religion"))+
  labs(y="",x="Share of violence affected communities worked")+
  theme_bw() + theme(panel.border = element_blank(),
                     panel.grid.major = element_blank(),
                     legend.position = "bottom",
                     panel.grid.minor = element_blank(),
                     text=element_text(size=15),
                     axis.text.y = element_blank(),
                     axis.ticks.y=element_blank(),
                     axis.title.y = element_text(size=15),
                     legend.text=element_text(size=10),
                     legend.title = element_text(size=15),
                     axis.line = element_line(colour = "black"))+
  ggtitle("Violent Communities Worked")

plot_grid(vi_affil, vc_affil)

# Appendix ----------------------------------------------------------------

## The Appendix was created using RMarkdown

# Table A1: Response rates by organization --------------------------------

df2<-read.csv("anon_sample_frame_CPS.csv")

df2 %>% group_by(org) %>% dplyr::summarise(completed= mean(completed)) %>%
  mutate_if(is.numeric, round, 2)

# Table A2: Distribution of ethnicity in sampling frame --------------------------------

df2 %>% dplyr::summarise(across(akan:nmande, mean), .groups = 'drop') %>%
  mutate_if(is.numeric, round, 2)

# Table A3: Response rates by ethnicity/gender --------------------------------

completed.table<-as.data.frame.matrix(table(df2$ethnic_all, df2$completed))
completed.table$cat<-row.names(completed.table)
completed.table$mean0<-completed.table$`0`/(completed.table$`0`+completed.table$`1`)
completed.table$mean1<-completed.table$`1`/(completed.table$`0`+completed.table$`1`)
completed.table$`0`<-NULL
completed.table$`1`<-NULL
completed.table2<-as.data.frame.matrix(table(df2$Femme., df2$completed))
completed.table2$cat<-c("male", "female")
completed.table2$mean0<-completed.table2$`0`/(completed.table2$`0`+completed.table2$`1`)
completed.table2$mean1<-completed.table2$`1`/(completed.table2$`0`+completed.table2$`1`)
completed.table2$`0`<-NULL
completed.table2$`1`<-NULL
completed.table<-rbind(completed.table, completed.table2)
names(completed.table)<-c("category", "not completed", "completed")

completed.table %>% mutate_if(is.numeric, round, 2)

# Table A4: Summary Statistics of Variables of Interest --------------------------------

#df<-read_csv("../Enumerators_Cleaned_v8.csv")
dvs.c<-c("challenges_5", "Local_challenges_index")
dvs.c2<-c(c(colnames(df)[grepl("Local_Chall_",colnames(df))]))

dvs.s<-c(c(colnames(df)[grepl("break_protocol",colnames(df))]), c(colnames(df)[grepl("solution_threats",colnames(df))])[1:5])
dvs.p<-"difficulties_coll_6"
dvs.a<-c("affiliations_1","affiliations_2" ,"affiliations_3")
dvs.dif<-c(colnames(df)[grepl("difficulties_coll",colnames(df))][1:6])

all_dvs<-c(dvs.c2, dvs.s, dvs.dif, dvs.a)

var.names<-c(
  "Chiefs",
  "Party leaders",
  "Mayors",
  "Prefets",
  "Police",
  "Youth",
  "Respondent families",
  "Respondent", "Other community members","Skip HH", "Fabricate Answers", "Adapt or rephrase", "Abandon", "Deviate random-walk", "Lie to supervisor",
  "Select easier respondent", "Ease questions", "Skip HH", "Fill in answers", "Skip questions","Choose easier respondents",
  "Abbreviate questions/consent", "Complicated Qs", "Not relevant", "Rephrasing needed","Too sensitive", "Culturally irrelevant", "Psychologically challenging", "Emphasized ethnicity", "Emphasized partisanship", "Emphasized religion")

summary_stats<-t(stat.desc(df[all_dvs]))
summary_stats<-as.data.frame(summary_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])
summary_stats$desc<-var.names
summary_stats<-summary_stats[c(6,1,2,3,4,5)]
colnames(summary_stats)<-c("description", "mean", "min", "max", "std.dev", "n")

summary_stats %>% mutate_if(is.numeric, round, 2)

# Figure A1: Generalizability: Afrobarometer levels of insecurity -------------------------------

# Load Afrobarometer data directly from  website (internet connection required)
afro <- read.spss("https://www.afrobarometer.org/wp-content/uploads/2022/02/r7_merged_data_34ctry.release.sav",
                  col_select = c("COUNTRY", "Q10A", "Q10B",
                                 "Q11A", "Q11B",
                                 "Q107E", "Q107D",
                                 "Q55PT1","Q55PT2",
                                 "LOCATION.LEVEL.1",
                                 "Q110A", "Q110E", "Q112"))



# If Afrobarometer data is saved locally, instead executive the following commands:
afro <- read.spss("r7_merged_data_34ctry.release.sav")
afro <- afro %>% dplyr::select("COUNTRY", "Q10A", "Q10B",
"Q11A", "Q11B",
"Q107E", "Q107D",
"Q55PT1","Q55PT2",
"LOCATION.LEVEL.1",
"Q110A", "Q110E", "Q112")


afro<- afro %>% data.frame() %>% mutate_at(c("Q10A", "Q10B",
                          "Q11A", "Q11B"),as.numeric) %>%
  mutate(threat=ifelse(Q107E=="Yes" | Q107D=="Yes", 1, 0),
         Q10A=ifelse(Q10A>=7, NA, Q10A),
         unsafe=ifelse(Q10A %in% 3:6, 1, 0),
         Q10B=ifelse(Q10B>=7, NA, Q10B),
         crime=ifelse(Q10B %in% 3:6, 1, 0),
         Q11B=ifelse(Q11B>=6, NA, Q11B),
         attacked=ifelse(Q11B %in% 3:5, 1, 0),
         insecurity=ifelse(unsafe==1 | attacked==1, 1, 0),
         hostile=ifelse(Q110A=="Hostile", 1, 0),
         suspicious=ifelse(Q110E=="Suspicious", 1, 0),
         N=1)

afro$insecurity_issue<-ifelse(grepl("violence", afro$Q55PT1, ignore.case = T)|
                                grepl("security", afro$Q55PT1, ignore.case = T) |
                                grepl("instability", afro$Q55PT1, ignore.case = T) |
                                grepl("war", afro$Q55PT1, ignore.case = T)|
                                grepl("violence", afro$Q55PT2, ignore.case = T)|
                                grepl("security", afro$Q55PT2, ignore.case = T) |
                                grepl("instability", afro$Q55PT2, ignore.case = T) |
                                grepl("war", afro$Q55PT2, ignore.case = T), 1, 0)

afro$insecurity_concern_exp<-ifelse(afro$insecurity==1 | afro$insecurity_issue==1, 1, 0)


afro2<- afro %>% dplyr::select(COUNTRY, LOCATION.LEVEL.1, insecurity, insecurity_issue,
                        insecurity_concern_exp, crime, N) %>%
  group_by(COUNTRY, LOCATION.LEVEL.1) %>%
  summarise_if(is.numeric,mean, na.rm=T)%>%
  mutate(insecurity2=ifelse(insecurity>mean(insecurity), 1, 0),
         insecurity_issue2=ifelse(insecurity_issue>mean(insecurity_issue), 1, 0),
         insecurity_concern_exp2=ifelse(insecurity_concern_exp>mean(insecurity_concern_exp), 1, 0),
         crime2=ifelse(crime>mean(crime), 1, 0))


afro_EA_high<-afro2 %>% group_by(COUNTRY) %>% mutate(N2=1) %>%
  summarise_at(c("N2", "insecurity2", "insecurity_issue2",
                 "insecurity_concern_exp2", "crime2"), sum, na.rm=T) %>%
  mutate(insecurity=insecurity2/N2, insecurity_issue=insecurity_issue2/N2,
         insecurity_concern_exp=insecurity_concern_exp2/N2, crime=crime2/N2) %>%
  select(COUNTRY, insecurity, insecurity_issue, insecurity_concern_exp, crime) %>%
  mutate(civ=ifelse(grepl("ivoire", COUNTRY, ignore.case = T),1,0))



p4<-ggplot(afro_EA_high, aes(x=reorder(factor(COUNTRY), insecurity), y=insecurity,
                             fill=factor(civ))) +
  ggtitle("Share of enumeration areas with high reports of feeling unsafe or attacked")+
  scale_fill_manual(values=c("white", "gray"), name="")+
  #scale_x_discrete(limits = rev(levels(means$COUNTRY)))+
  geom_bar(position=position_dodge(),stat="identity", color="black")+
  geom_hline(yintercept= mean(afro_EA_high$insecurity), linetype="dotted",
             color = "blue", size=1.5)+
  ylab("")+
  coord_flip()+
  xlab("")+
  theme_bw()+theme(legend.position="none")


p5<-ggplot(afro_EA_high, aes(x=reorder(factor(COUNTRY), insecurity_issue),
                             y=insecurity_issue, fill=factor(civ))) +
  ggtitle("Share of enumeration areas with high reports of insecurity main issue")+
  scale_fill_manual(values=c("white", "gray"), name="")+
  #scale_x_discrete(limits = rev(levels(means$COUNTRY)))+
  geom_bar(position=position_dodge(),stat="identity", color="black")+
  geom_hline(yintercept= mean(afro_EA_high$insecurity_issue), linetype="dotted",
             color = "blue", size=1.5)+
  ylab("")+
  coord_flip()+
  xlab("")+
  theme_bw()+theme(legend.position="none")


p6<-ggplot(afro_EA_high, aes(x=reorder(factor(COUNTRY), crime),
                             y=crime, fill=factor(civ))) +
  ggtitle("Share of enumeration areas with high reports of victim of crime")+
  scale_fill_manual(values=c("white", "gray"), name="")+
  #scale_x_discrete(limits = rev(levels(means$COUNTRY)))+

  geom_bar(position=position_dodge(),stat="identity", color="black")+
  geom_hline(yintercept= mean(afro_EA_high$crime), linetype="dotted",
             color = "blue", size=1.5)+ylab("")+
  coord_flip()+
  xlab("")+
  theme_bw()+theme(legend.position="none")

plot_grid(p4, p5,p6)

# Table A5: Generalizability: Google Scholar Searches -------------------------------

google<-read.csv("google_searches_2.csv")

google %>%  mutate_if(is.numeric, round, 4)

# Figure A2: Demographic correlates of violence experience -------------------------------


ivs<-c("experience_years","sex","dept_abidjan","projectN","akan",
       "nmande","smande","krou","voltaique","govt","christian")

corre<-lapply(df[ivs], function(x){tidy(lm(violence_index1~x, df))}) %>% bind_rows() %>% filter(term=="x") %>% mutate(term=ivs) %>% mutate(model="Violence Experience Index")

dwplot(corre, dot_args = list(size = 3),
       vline = geom_vline(
         xintercept = 0,
         colour = "grey60",
         linetype = 2
       )) +
  theme_bw()+
  scale_color_manual(values=c("black"), guide = guide_legend(reverse = TRUE))+
  theme(axis.text = element_text(size=10),
        legend.text = element_text(size=12),
        legend.position = "bottom",
        legend.title = element_blank())


# Figure A3: Respondent home dept and work depts----------

dept_centroids<-read.csv("depts_base_worked.csv")

civ.df<-read.csv("CIV_map.csv")

ggplot(civ.df)+
  aes(x=long, y=lat) +
  geom_polygon(aes(group=group),colour="black", fill=NA)+
  coord_map("mercator")+
  geom_path(data=civ.df, aes(x=long, y=lat, group=group), color="gray", size=.5)+
  geom_point(alpha=0.5, data=subset(dept_centroids, !is.na(base)),aes(x=long, y=lat, size = base), color="darkmagenta")+
  scale_size(range=c(1,10), breaks=c(1, 15, 100), name="Number of \nRespondents: \nhome department")+
  geom_text(data=subset(dept_centroids, !is.na(base)), aes(label = dept_name, x = long, y = lat),
            color="black", size=3.5,nudge_y = -0.04, fontface="bold")+
  theme_bw()+theme(panel.grid=element_blank(), axis.text.x=element_blank(),
                   axis.text.y=element_blank(), axis.ticks.x=element_blank(),
                   axis.ticks.y=element_blank(),
                   axis.title.x=element_blank(), axis.title.y=element_blank(),
                   legend.position = c(.18, .25),
                   legend.justification = c("right", "top"),
                   legend.box.just = "right",
                   legend.margin = margin(6, 6, 6, 6),
                   legend.box.background = element_rect(color="black", size=1))


ggplot(civ.df)+
  aes(x=long, y=lat) +
  geom_polygon(aes(group=group),colour="black", fill=NA)+
  coord_map("mercator")+
  geom_path(data=civ.df, aes(x=long, y=lat, group=group), color="gray", size=.5)+
  geom_point(alpha=0.5, data=subset(dept_centroids, !is.na(work)),aes(x=long, y=lat, size = work), color="turquoise")+
  #geom_count()+
  scale_size(range=c(1,10), breaks=c(7,15,30, 60, 90), name="Number of \nRespondents: \nwork department")+
  geom_text(data=subset(dept_centroids, !is.na(work)), aes(label = dept_name, x = long, y = lat),
            color="black", size=3.5,nudge_y = -0.04, fontface="bold")+
  theme_bw()+theme(panel.grid=element_blank(), axis.text.x=element_blank(),
                   axis.text.y=element_blank(), axis.ticks.x=element_blank(),
                   axis.ticks.y=element_blank(),
                   axis.title.x=element_blank(), axis.title.y=element_blank(),
                   legend.position = c(.18, .25),
                   legend.justification = c("right", "top"),
                   legend.box.just = "right",
                   legend.margin = margin(6, 6, 6, 6),
                   legend.box.background = element_rect(color="black", size=1))



# Table A6: Summary statistics for firms worked at least once ----------

firms<-c("ipsos", "afro", "crefdi", "ins", "jpal", "ipa", "firm_other")
firms2<-c("IPSOS", "Afrobarometer", "CREFDI", "INS", "J-PAL", "IPA", "Independent Researchers")

firm_stats<-t(stat.desc(df[firms]))

firm_stats<-as.data.frame(firm_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])
firm_stats$des<-firms2
firm_stats<-firm_stats[c(6,1,2,3,4,5)]
colnames(firm_stats)<-c("description", "mean", "min", "max", "std.dev", "n")

firm_stats %>%  mutate_if(is.numeric, round, 2)

# Table A7: Summary statistics for Experience Topics ----------

topics<-c(colnames(df)[grepl("exp_topic_",colnames(df))][1:15])
topics2<-c("Agriculture",
           "Peacebuilding",
           "Conflict & Civil War",
           "Covid-19",
           "Demography",
           "Education",
           "Employment & Job Creation",
           "Environment",
           "Gender",
           "Marketing",
           "Poverty & Poverty Reduction",
           "Politics & Governance",
           "Social Service Delivery",
           "Health",
           "Other" )

topics_stats<-t(stat.desc(df[topics]))

topics_stats<-as.data.frame(topics_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])
topics_stats$des<-topics2
topics_stats<-topics_stats[c(6,1,2,3,4,5)]
colnames(topics_stats)<-c("description", "mean", "min", "max", "std.dev", "n")

topics_stats %>%  mutate_if(is.numeric, round, 2)


# Table A8: Summary statistics for Safety Challenges -----

safe<-c("safe", colnames(df)[grepl("^safe_why_", colnames(df))][1:9])
safe2<-c("Felt unsafe", "Unsafe - politics","Unsafe - unwelcomed", "Unsafe - isolation", "Unsafe - harassment", "Unsafe - witchcraft", "Unsafe - Respondents",
         "Unsafe - violence","Unsafe - crime","Unsafe - Other")

safety_stats<-t(stat.desc(df[safe]))

safety_stats<-as.data.frame(safety_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])
safety_stats$desc<-safe2
safety_stats<-safety_stats[c(6,1,2,3,4,5)]
colnames(safety_stats)<-c("description", "mean", "min", "max", "std.dev", "n")

safety_stats %>%  mutate_if(is.numeric, round, 2)

# Table A9: Summary statistics for Leave/Leave Why----

leave_why<-c("leave", colnames(df)[grepl("leave_why_",colnames(df))][1:10], "leave_left")
leave_why2<-c("Considered Leaving","Insufficient pay", "Sensitive topic", "Feared safety", "Uncomfortable", "Low quality survey",
              "Time commitment too long", "Travel distance", "Research team disputes", "Personal issues", "Other", "Actually left job")

leave_why_stats<-t(stat.desc(df[leave_why]))

leave_why_stats<-as.data.frame(leave_why_stats[,c("mean", "min", "max", "std.dev", "nbr.val")])
leave_why_stats$des<-leave_why2
leave_why_stats<-leave_why_stats[c(6,1,2,3,4,5)]
colnames(leave_why_stats)<-c("description", "mean", "min", "max", "std.dev", "n")

leave_why_stats %>%  mutate_if(is.numeric, round, 2)



# Figure A4: Violence Experience and Labor Conditions ----

labor<-c(colnames(df)[grepl("^challenges_",colnames(df))])[2:9][-5]

corre4<-lapply(df[labor], function(x){tidy(lm(violence_index1~x, df))}) %>% bind_rows() %>% filter(term=="x") %>%
  mutate(term=labor) %>% mutate(model="Violence Experience Index")

dwplot(corre4, dot_args = list(size = 3),
       vline = geom_vline(
         xintercept = 0,
         colour = "grey60",
         linetype = 2
       )) %>%
  relabel_predictors(
    c("(Intercept)"="Intercept",challenges_1="Insufficient Pay ",
      challenges_2="Insufficient food/accommodation ",
      challenges_3="Limited cell phone reception ",
      challenges_4=
        "Difficult travel conditions ",
      challenges_6="Uncomfortable working in assigned region",
      challenges_7="Time commitment was too long",
      challenges_8="Required to travel too much",
      challenges_9="Disputes with the research team")
  )+
  theme_bw()+
  scale_color_manual(values=c("black"), guide = guide_legend(reverse = TRUE))+
  theme(axis.text = element_text(size=10),
        legend.text = element_text(size=12),
        legend.position = "bottom",
        legend.title = element_blank())



# Figure A5: Enumerator enjoyment of job and research process ----

data_enjoy<-lapply(df[,names(df) %in% colnames(df)[grepl("enjoy",colnames(df))]], function(x) ifelse(x>=2,1, 0))


data_enjoy<-data_enjoy %>% data.frame() %>% pivot_longer(exp_mostenjoyable_1:exp_mostenjoyable_7) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T)

data_enjoy$name<-forcats::fct_rev(data_enjoy$name)

p1<-ggplot(data_enjoy, aes(x = reorder(factor(name),value), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=.85, vjust=.5, color="white", size=4)+
  theme_minimal() +
  scale_x_discrete(breaks=c(colnames(df)[grepl("enjoy",colnames(df))]), labels=c("Travel country",
                                                                                     "Learning about fellow citizens",
                                                                                     "Involvement in research process",
                                                                                     "Pay", "Professional skills",
                                                                                     "Improve country",
                                                                                     "Flexible work schedule"))+
  xlab("") + ylab("(%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=15),
        legend.title = element_text(size=15))+
  ylim(0,100)+ggtitle("Enumerator Enjoyment of Job")



data_research_process<-lapply(df[,names(df) %in% colnames(df)[grepl("research_process",colnames(df))]], function(x) ifelse(x>=2,1, 0))

data_research_process<-data_research_process %>% data.frame() %>% pivot_longer(research_process_1:research_process_4) %>%
  group_by(name) %>% summarise_at("value", mean,na.rm=T)

data_research_process$name<-forcats::fct_rev(data_research_process$name)

p2<-ggplot(data_research_process, aes(x = reorder(factor(name),value), y = value*100)) +
  geom_bar( stat = "identity", width = .5) +
  geom_text(aes(label=paste0(round(value*100,0), "%")), fontface="bold",hjust=.85, vjust=.5, color="white", size=4)+
  theme_minimal() +
  scale_x_discrete(breaks=c(colnames(df)[grepl("research_process",colnames(df))]), labels=c("I am valuable to process",
                                                                                            "Uncompensated work",
                                                                                            "My knowledge is undervalued",
                                                                                            "Studies can be improved with consultation"))+
  xlab("") + ylab("(%)") +
  coord_flip()+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_text(size=15),
        axis.title.y = element_text(size=15),
        legend.text=element_text(size=15),
        legend.title = element_text(size=15))+
  ylim(0,100)+ggtitle("Enumerator feelings about \nresearch process")

plot_grid(p1, p2)


# Table A10: Ease of Interviewing in-group ----

ease1<-df[,names(df) %in% colnames(df)[grepl("ease_resp_",colnames(df))][c(1,3:6)]]
ease<-dummy_cols(ease1, select_columns = c(colnames(ease1)[grepl("ease_resp_",colnames(ease1))]))

ease<- ease %>% pivot_longer(ease_resp_eth_1:ease_resp_edu_inverted_NA) %>% group_by(name) %>%
  summarise_at("value", mean,na.rm=T) %>% filter(!grepl("NA", name)) %>%
  mutate(name=gsub("ease_resp_edu_inverted", "ease_resp_edu", name)) %>%
  mutate(reason=str_split(name, "_", simplify = T)[,3],
         type=str_split(name, "_", simplify = T)[,4]) %>%
  select(reason, type, value) %>%
  pivot_wider(names_from="type", values_from = "value")

colnames(ease)<-c("Group", "In-group_easier", "Same", "Out-group_easier")

ease %>%  mutate_if(is.numeric, round, 2)


# Figure A6: Afrobarometer: Threatened enumerators----

means_country<- afro %>% dplyr::select(COUNTRY, threat) %>% group_by(COUNTRY) %>% summarise_if(is.numeric, mean, na.rm=T)
means_country$CIV<-ifelse(means_country$COUNTRY=="Côte d'Ivoire", 1, 0)

ggplot(means_country, aes(x=reorder(COUNTRY, threat), y=threat, fill=factor(CIV))) +
  scale_fill_manual(values=c("gray", "white"))+
  geom_bar(position=position_dodge(),stat="identity", color="black")+
  ylab("Enum felt or was threatened (Share of interviews in country)")+
  coord_flip()+
  xlab("")+
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x= element_blank(),
        text=element_text(size=20),
        axis.title.x =element_text(size=20),
        panel.spacing = unit(2, "lines"),
        legend.position = "none",
        strip.text.x = element_text(size = 20, colour = "black"),
        strip.background= element_rect(colour = "black", fill = "white"))+  guides(fill="none")+
  theme_light()


# Table A11: Respondent characteristics - share of enumerators----

enum_sum1<-afro %>% dplyr::select(Q112, threat, hostile, suspicious) %>% group_by(Q112) %>% summarise_if(is.numeric,sum, na.rm=T)


enum_sum<-enum_sum1 %>% mutate(threatened=ifelse(threat>0, 1, 0), hostile2=ifelse(hostile>0, 1, 0), suspicious2=ifelse(suspicious>0, 1, 0)) %>%
  dplyr::select(Q112, threatened, hostile2, suspicious2)
colnames(enum_sum)<-c("Q112", "threat", "hostile", "suspicious")

enum_sum<-t(stat.desc(enum_sum))

enum_sum<-as.data.frame(enum_sum[,c("mean", "min", "max", "std.dev", "nbr.val")])

enum_sum<-enum_sum[2:4,]
colnames(enum_sum)<-c("mean", "min", "max", "std.dev", "n")

enum_sum %>%  mutate_if(is.numeric, round, 2)

# Figure A7: Afrobarometer: Insecurity, Enumerator-level respondent characteristics----


afro3<- afro %>% dplyr::select(COUNTRY, LOCATION.LEVEL.1, insecurity, insecurity_issue, insecurity_concern_exp, crime, N) %>%
  group_by(COUNTRY, LOCATION.LEVEL.1) %>% summarise_if(is.numeric,mean, na.rm=T)%>%
  mutate(insecurity2=ifelse(insecurity>mean(insecurity), 1, 0), insecurity_issue2=ifelse(insecurity_issue>mean(insecurity_issue), 1, 0),
         insecurity_concern_exp2=ifelse(insecurity_concern_exp>mean(insecurity_concern_exp), 1, 0),
         crime2=ifelse(crime>mean(crime), 1, 0))  %>%
  mutate(high_insecurity=ifelse(insecurity>mean(insecurity)|insecurity_issue>mean(insecurity_issue)|
                                  insecurity_concern_exp>mean(insecurity_concern_exp)|crime>mean(crime), 1, 0)) %>%
  dplyr::select(COUNTRY, LOCATION.LEVEL.1, high_insecurity)

enum2<-afro %>% dplyr::select(Q112, COUNTRY, LOCATION.LEVEL.1, threat, hostile, suspicious) %>%
  group_by(Q112,LOCATION.LEVEL.1, COUNTRY) %>% summarise_if(is.numeric,sum, na.rm=T) %>%
  mutate(threatened=ifelse(threat>0, 1, 0), hostile2=ifelse(hostile>0, 1, 0), suspicious2=ifelse(suspicious>0, 1, 0))

enum2<-merge(enum2, afro3, by=c("COUNTRY", "LOCATION.LEVEL.1"))
enum2.i<- enum2 %>% filter(high_insecurity==1)
enum2_insecure<-unique(enum2.i$Q112)

enum2.s<- enum2 %>% filter(high_insecurity==0)
enum2.s_secure<-unique(enum2.s$Q112)


# Total number of enumerators by country
afro_N<-afro %>% dplyr::select(COUNTRY, Q112) %>% filter(Q112 %in% enum2_insecure) %>% group_by(COUNTRY) %>%
  dplyr::summarise(count_insecure = n_distinct(Q112))


afro_N.s<-afro %>% dplyr::select(COUNTRY, Q112) %>% filter(Q112 %in% enum2.s_secure) %>% group_by(COUNTRY) %>%
  dplyr::summarise(count_secure = n_distinct(Q112))

enum_all<-rbind(enum2.i, enum2.s)

afro_N<-merge(afro_N, afro_N.s, by="COUNTRY")


enum4<-enum_all %>%  group_by(COUNTRY, Q112, high_insecurity) %>%
  summarise_at(c("threatened", "hostile2", "suspicious2"), sum, na.rm=T) %>%
  mutate(N_threat=ifelse(threatened>0, 1, 0), N_hostile=ifelse(hostile2>0, 1, 0),
         N_suspicious=ifelse(suspicious2>0, 1, 0)) %>% group_by(COUNTRY, high_insecurity) %>%
  summarise_at(c("N_threat", "N_hostile", "N_suspicious"), sum, na.rm=T)%>%
  pivot_longer(N_threat:N_suspicious) %>% mutate(var=stringr::str_split(name, "_", simplify=T)[,2])


enum4<-merge(afro_N, enum4, by="COUNTRY")



enum4 %>% dplyr::select(COUNTRY, count_insecure, count_secure, high_insecurity, value, var) %>%
  mutate(value2=ifelse(high_insecurity==1, value/count_insecure, value/count_secure))%>%

  ggplot(aes(x=var, y=value2, fill=factor(high_insecurity))) +
  scale_fill_manual(values=c("white", "gray"), name="", labels=c("Low insecurity","High insecurity"))+
  #scale_x_discrete(limits = rev(levels(means$COUNTRY)))+
  geom_bar(stat="identity", position="dodge", width=.5, color="black")+
  ylab("Respondent characteristics (Share of enumerators)")+
  xlab("")+
  facet_wrap(~COUNTRY, scales = 'free_x')+
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x= element_blank(),
        text=element_text(size=20),
        axis.title.x =element_text(size=20),
        panel.spacing = unit(2, "lines"),
        #legend.position = "none",
        #c(0.2, 0.2),
        #legend.text = element_text(size=20),         legend.title = element_text(size=20), legend.background = element_rect(size=0.5, linetype="solid", color="black"),
        strip.text.x = element_text(size = 20, colour = "black"),
        strip.background= element_rect(colour = "black", fill = "white"))+
  theme_light()
